Librairies utilisées

library(car)
## Loading required package: carData
library(ggplot2)
library(ggthemes)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
library(plot3D)
library(stringr)
require(MASS)
## Loading required package: MASS

Vérification des priors utilisés

Fonction

#Data frame prior
data.frame.prior = function(seq_ne, max_simu){

  #Créer matrice vide qui contiendra les stats
  mat_prior = matrix(data = NA, nrow = length(seq_ne)*max_simu, ncol = 4)
  #Attribution nom colonnes selon appelation stat par MetHis
  colnames(mat_prior) = c("Ne", "simu", "s1", "s2")  
  mat_prior = as.data.frame(mat_prior)
  #Compteur pour indiquer les lignes à remplir
  cpt = 0
  
  for (ne in seq_ne) {
    dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
    row_min = cpt*max_simu+1
    row_max = (cpt+1)*max_simu
    mat_prior[row_min:row_max,1] = toString(ne)
    cpt = cpt+1
    
    for (nb_simu in 1:max_simu) {
      mat_prior[row_min+nb_simu-1,2] = nb_simu
      dir_simu = str_c("simu_", toString(nb_simu), "/")
      file_path = str_c(dir_ne, dir_simu, "simu_", nb_simu, ".txt")
      df_tmp = read.table(file_path, header = TRUE)
      mat_prior[row_min+nb_simu-1,3:4] = df_tmp[1,5:6]
    }
  }
  

  #Passage des Ne en facteur
  mat_prior$Ne = factor(as.factor(mat_prior$Ne), levels = as.character(seq_ne))
  #Passage simuulations en entier
  mat_prior$simu = as.integer(mat_prior$simu)  

  return(mat_prior)
}

Fonctions utilisées

Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “Ne100/simu_1” (en modifiant les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.

#Data frame construction
data.frame.stat = function(seq_ne, max_gen, max_simu){

  #Créer matrice vide qui contiendra les stats
  mat_stat = matrix(data = NA, nrow = length(seq_ne)*max_gen*max_simu, ncol = 63)
  mat_stat = as.data.frame(mat_stat)
  #Compteur pour indiquer les lignes à remplir
  cpt = 0

  for (ne in seq_ne) {
    if (dir.exists(str_c("Ne", toString(ne), "/"))) {
      dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
      row_min = cpt*max_gen*max_simu+1 #Ligne min. où on écrit les stats pour le Ne donné
      row_max = (cpt+1)*max_gen*max_simu         #Ligne max. où on écrit les stats  
      mat_stat[row_min:row_max,1] = toString(ne) #Ecriture du Ne correspondant pour les stats qui seront écrites
      cpt = cpt+1                             #Incrémentation du compteur
  
      for (nb_simu in 1:max_simu) {
        dir_simu = str_c("simu_", toString(nb_simu), "/")
        row_min_simu = (nb_simu-1)*max_gen + row_min
        row_max_simu = nb_simu*max_gen + row_min -1
        mat_stat[row_min_simu:row_max_simu,2] = nb_simu
        file_path = str_c(dir_ne, dir_simu, "final_sumstats.txt")
        file_stat = read.table(file_path, header = TRUE) #Lecture fichier stat résumées
        #Suppression 1ère colonne, contenant uniquement chiffres pour tri en bash. 
        mat_stat[row_min_simu:row_max_simu,3:63] = file_stat
      }
    }
  }
  

  #Attribution nom colonnes selon appelation stat par MetHis
  colnames(mat_stat) = c("Ne", "simu", names(file_stat))
  #Passage des Ne en facteur
  mat_stat$Ne = factor(as.factor(mat_stat$Ne), levels = as.character(seq_ne))
  #Passage simulations en entier
  mat_stat$simu = as.integer(mat_stat$simu)
  #Passage générations en entier
  mat_stat$Generation = as.integer(mat_stat$Generation)
  #Passage stats en double
  for (numcol in 4:ncol(mat_stat)) {
    mat_stat[,numcol] = as.double(mat_stat[,numcol])
  }
  
  return(mat_stat)
}

Dans le cas où l’on travaille sur de très nombreuses simulations correspondant à un effectif efficace, cette fonction permet de calculer les statistiques moyennées pour les valeurs de Ne étudiées.

#Passage en moyenne
data.frame.mean = function(df_stat, max_gen, seq_ne){
  
  lrow = length(seq_ne)
  df_mean = matrix(data = NA, nrow = lrow*max_gen, ncol = 62 )
  df_mean = as.data.frame(df_mean)
  cpt = 0

  for (ne in seq_ne) {
    df_tmp = df_stat[which(df_stat$Ne == ne),]
    
    row_min = cpt*max_gen+1
    row_max = (cpt+1)*max_gen
    df_mean[row_min:row_max, 1] = toString(ne) 
    cpt = cpt+1
    
    for (gen in 0:(max_gen-1) ) {
      df_mean[row_min+gen, 2] = gen
      vec_tmp = apply(df_tmp[which(df_tmp$Generation == gen), 4:63], 2, mean)
      df_mean[row_min+gen, 3:62] = vec_tmp
    }
  }
  
  colnames(df_mean) = colnames(df_stat)[-2]
  df_mean$Ne = factor(as.factor(df_mean$Ne), levels = seq_ne )
  return(df_mean)
}

Fonction d’affichage des graphiques en 2D, avec un background blanc et les lignes horizontales de quadrillage affichées. Cette fonction permet d’afficher les lignes de régression ou les lignes reliant les points.

#Plot function
#Affichage d'une stat au cours des générations
plot_stat_gen = function(df, gen, stat, group_col, color_col, titre, ligne = FALSE, legd = TRUE,
                         size_point = 0.1, line_t = "solid"){
  p = ggplot(df, aes(x = gen, y = stat,
                     group = group_col, color = color_col)) + ggtitle(titre)
  
  if (ligne) {
    p = p + geom_point(size = size_point, show.legend = legd)+
      geom_line(aes(linetype = line_t), show.legend = legd)
  }else{
    #smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
    p = p + geom_point(size = size_point, show.legend = legd)+ geom_smooth(show.legend = legd)
  }
  p = p + theme_classic()
  p = p + theme(panel.grid.major.y = element_line(size = 0.5,
                                                  linetype = 'solid',
                                                  colour = "#BABABA"),
                panel.grid.minor.y = element_line(size = 0.25,
                                                  linetype = 'solid',
                                                  colour = "#CECECE"))
  #print(p)
  return(p)
}

Cette fonction d’affichage de graphique reprend la précédente, mais sans affficher les points.

#Plot function
#Affichage d'une stat au cours des générations
plot_without_point = function(df, gen, stat, color_col, titre, legd = TRUE){
  p = ggplot(df, aes(x = gen, y = stat, color = color_col)) + ggtitle(titre)
  
    #smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
  p = p + geom_smooth(show.legend = legd)
  p = p + theme_classic()
  p = p + theme(panel.grid.major.y = element_line(size = 0.5,
                                                  linetype = 'solid',
                                                  colour = "#BABABA"),
                panel.grid.minor.y = element_line(size = 0.25,
                                                  linetype = 'solid',
                                                  colour = "#CECECE"))
  #print(p)
  return(p)
}

Fonction d’extraction d’une sous-matrice à partir d’une matrice principale en fonction de valeurs choisies^ de Ne.

extract_sub_mat = function(all_mat, list_ne){
  all_rows = c()
  for (ne in as.character(list_ne)) {
    all_rows = append(all_rows, which(all_mat[,1] == ne))
  }
  return(all_mat[all_rows,])
}

Population constante de tailles initiales différentes

Calcul des matrices de stats

seq_1024_16384 = 2**seq(10,14,1)
seq_50_1000 = seq(50,1000,1)
seq_tot = sort(c(seq_50_1000, seq_1024_16384))

mat_tot = data.frame.stat(seq_tot, 101, 1)
mat_50_1000 = extract_sub_mat(mat_tot, c(seq(50,200,10), seq(200,1000,100)))
mat_1024_16384 = extract_sub_mat(mat_tot, 2**seq(6,14,1))

#mat_1024_16384_mean = data.frame.mean(mat_1024_16384, 101, seq_1024_16384)

Graphiques obtenus : affichage de l’évolution d’une statistique donnée en fonction des générations colorée selon la Ne initiale.

p_tmp = plot_stat_gen(mat_tot, mat_tot$Generation, 
                      mat_tot$mean.het.adm, mat_tot$Ne, mat_tot$Ne,
                      "Stat en fonction des générations\n selon différentes Ne initiales",
                      legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p_tmp = plot_stat_gen(mat_50_1000, mat_50_1000$Generation, 
                      mat_50_1000$mean.het.adm, mat_50_1000$Ne, mat_50_1000$Ne,
                      "Stat en fonction des générations\nselon différentes Ne initiales",
                      legd = TRUE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation, 
              mat_1024_16384$mean.het.adm, mat_1024_16384$Ne, mat_1024_16384$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_tot, mat_tot$Generation, 
              mat_tot$Fst.s1.adm, mat_tot$Ne,mat_tot$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_50_1000, mat_50_1000$Generation, 
              mat_50_1000$Fst.s1.adm, mat_50_1000$Ne,mat_50_1000$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation, 
              mat_1024_16384$Fst.s1.adm, mat_1024_16384$Ne,mat_1024_16384$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Populations croissantes

Réutilisation de la fonction de lecture de stats résumées pour des populations croissantes. Celles-ci sont enregistrées dans des répertoires de la forme “Ne100-XXX/Ne100-1000/simu_1” Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “NeXXX/simu_XXX” (avec XXX remplacés par les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.

data.frame.increase = function(seq_ne_ini, seq_combi){
  for (ne_ini in seq_ne_ini) {
    if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
      dir_ne = str_c("Ne", ne_ini, "-XXX/")
      setwd(dir_ne)
      motif_detect = str_c("^",ne_ini,"-")
      vec_ne_tmp = seq_combi[which(str_detect(seq_combi, motif_detect))]
      mat_tmp = data.frame.stat(seq_ne = vec_ne_tmp, max_gen = 101, max_simu = 1)
      if (ne_ini == seq_ne_ini[1]) {
        mat_pop_inc = mat_tmp
      }else{
        mat_pop_inc = rbind(mat_pop_inc, mat_tmp)
      }
      setwd("../")
    }
  }
  return(na.omit(mat_pop_inc))
}

Lecture des data frame pour les combinaison de Ne entre ne0 (ne_ini) et nef (ne_fin). On extrait ensuite les stats pour lesquelles on a un nef de 10000.

setwd("../new_methis_pop_increase_50000_snp/")
seq_ne_ini = seq(50,100,2)
seq_ne_fin = seq(100, 10000, 100)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

mat_pop_inc = data.frame.increase(seq_ne_ini, seq_combi)


mat_end_10000 = extract_sub_mat(mat_pop_inc,
                                as.data.frame(outer(seq(50,100,4),
                                                    c("10000"),
                                                    FUN="paste",
                                                    sep="-"))[,1])
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$f3, mat_pop_inc$Ne,mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$mean.het.adm, mat_pop_inc$Ne,mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_end_10000, mat_end_10000$Generation, 
              mat_end_10000$mean.het.adm, mat_end_10000$Ne,mat_end_10000$Ne,
              "Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales",
              legd = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_end_10000, mat_end_10000$Generation, 
              mat_end_10000$f3, mat_end_10000$Ne,mat_end_10000$Ne,
              "F3 en fonction des générations\n selon différentes Ne initiales",
              legd = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Variation Nu

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu = sprintf("%.2f", seq(0.05, 0.45, 0.05))
seq_ne_ini = seq(50,100,10)
seq_ne_fin = seq(100, 10000, 200)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

list_df_nu = list()
cpt = 1

for (nu in seq_nu) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    list_df_nu[[cpt]] = data.frame.increase(seq_ne_ini, seq_combi)
    cpt = cpt+1
    setwd("../")
  }
}

for (combi_lst in 1:length(list_df_nu)) {
  if (combi_lst == 1) {
    mat_pop_nu = list_df_nu[[combi_lst]]
    mat_pop_nu = data.frame(mat_pop_nu, Nu = seq_nu[combi_lst])
  }else{
    mat_tmp = list_df_nu[[combi_lst]]
    mat_tmp = data.frame(mat_tmp, Nu = seq_nu[combi_lst])
    mat_pop_nu = rbind(mat_pop_nu, mat_tmp)
  }
}

mat_pop_nu$Nu = as.factor(mat_pop_nu$Nu)
plot_without_point(mat_pop_nu, mat_pop_nu$Generation, 
                   mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
                   "Stat en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
                   legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_without_point(mat_pop_nu, mat_pop_nu$Generation, 
                   mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
                   "Stat en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
                   legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
#              mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
#              "Stat en fonction des générations\n selon différentes Ne initiales",
#              TRUE, legd = FALSE)
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
#              mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
#              "Stat en fonction des générations\n selon différentes Ne initiales",
#              TRUE, legd = FALSE)
for (rg_list in 1:length(list_df_nu)) {
  p_tmp = plot_stat_gen(list_df_nu[[rg_list]], list_df_nu[[rg_list]]$Generation, 
                        list_df_nu[[rg_list]]$mean.het.adm, list_df_nu[[rg_list]]$Ne,
                        list_df_nu[[rg_list]]$Ne,
                        str_c("Nu = ",seq_nu[rg_list]), TRUE,legd = FALSE)
  p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s1[1],
                           color = "red")
  p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s2[1],
                           color = "black")
  print(p_tmp)

}

for (rg_list in 1:length(list_df_nu)) {
  
  list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
  list_ne = as.integer(unlist(str_split(list_ne, "-")))
  list_ne0 = list_ne[seq(1, length(list_ne), 2)]
  list_nef = list_ne[seq(2, length(list_ne), 2)]
  mat_tmp = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
  
  scatter3D(x = list_ne0, y = list_nef, z = mat_tmp$mean.het.adm,
            bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
            theta = 120, phi = 0, type = "s", cex.lab = 0.6, cex.axis = 0.5,
            xlab = "Ne0", ylab = "Nef", zlab = "Stat")

}

grid.lines = 50
fit <- loess(mat_tmp$mean.het.adm ~ list_nef + list_ne0)
x.pred <- seq(min(list_ne0), max(list_ne0), length.out = grid.lines)
y.pred <- seq(min(list_nef), max(list_nef), length.out = grid.lines)
xy <- expand.grid( x = x.pred, y = y.pred)
z.pred <- matrix(predict(fit, newdata = xy), 
                 nrow = grid.lines, ncol = grid.lines)
## Warning: 'newdata' had 2500 rows but variables found have 300 rows
fitpoints = predict(fit)
scatter3D(x = list_ne0, y = list_nef, z = mat_tmp$mean.het.adm,
          bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
          theta = 150, phi = 0, type = "s",cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
          xlab = "Ne0", ylab = "Nef", zlab = "Stat",
          surf = list(x = x.pred, y = y.pred, z = z.pred,
                      facets = NA))

Calcul matrice pour connaître Ne à chaque génération.

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")

mat_ne_inc = data.frame()
cpt = 1

for (nu in seq_nu) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    for (ne_ini in seq_ne_ini) {
      if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
        dir_ne = str_c("Ne", ne_ini, "-XXX/")
        setwd(dir_ne)
        seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
                                        FUN="paste", sep="-"))
        seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
        for (combi in seq_combi) {
          if (dir.exists(str_c("Ne", combi))) {
            dir_ne_bis = str_c("Ne", combi, "/simu_1/")
            setwd(dir_ne_bis)
            cpt = cpt + 1
            file_tmp = read.table("simu_1.par", header = TRUE)
            if (nu == seq_nu[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
              mat_ne_inc = as.data.frame(file_tmp[,2])
            }else{
              
              mat_ne_inc = cbind(mat_ne_inc, file_tmp[,2])
            }
            
            setwd("../../")
          }
        }
        setwd("../")
      }
    }
    setwd("../")

  }
}
mat_pop_nu = data.frame(mat_pop_nu,
                        ne_gen = unlist(mat_ne_inc , use.names = F),
                        n0 = rep(NA, nrow(mat_pop_nu)),
                        nf = rep(NA, nrow(mat_pop_nu)) )

for (i in 1:nrow(mat_pop_nu)) {
  vec_tmp = as.integer(unlist(str_split(mat_pop_nu$Ne[i], "-")))
  mat_pop_nu$n0[i] = vec_tmp[1]
  mat_pop_nu$nf[i] = vec_tmp[2]
}
for (rg_nu in seq_nu) {
  rg_nu
  df_tmp = mat_pop_nu[which(mat_pop_nu$Nu == rg_nu),]
  
  grid.lines = 30
  fit <- loess(df_tmp$mean.het.adm ~ df_tmp$Generation + as.numeric(df_tmp$ne_gen))
  x.pred <- seq(min(df_tmp$Generation), max(df_tmp$Generation), length.out = grid.lines)
  y.pred <- seq(min(as.numeric(df_tmp$ne_gen)),
                max(as.numeric(df_tmp$ne_gen)), length.out = grid.lines)
  xy <- expand.grid( x = x.pred, y = y.pred)
  z.pred <- matrix(predict(fit, newdata = xy), 
                 nrow = grid.lines, ncol = grid.lines)
  
  scatter3D(x = df_tmp$Generation, y = df_tmp$ne_gen, z = df_tmp$mean.het.adm,
            bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
            theta = 210, phi = 20, type = "s",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
            xlab = "Gen", ylab = "Ne", zlab = "Stat",
            surf = list(x = x.pred, y = y.pred, z = z.pred,facets = NA))
}
## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

## Warning: 'newdata' had 900 rows but variables found have 30300 rows

seq_extract = c("0.05", "0.25", "0.45")
df_extract = mat_pop_nu[which(mat_pop_nu$Nu == seq_extract),]
scatter3D(x = mat_pop_nu$Generation, y = mat_pop_nu$ne_gen,
          z = as.numeric(as.character(mat_pop_nu$Nu)),
          bty = "b2", pch = 16, ticktype = "detailed",
          theta = 210, phi = 50, type = "s", cex.lab = 0.6, cex.axis = 0.5,
          xlab = "Gen", ylab = "Ne", zlab = "Nu")

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu_bis = sprintf("%.2f", c(0.01, 0.25, 0.49))
seq_ne_ini = c(100)
seq_ne_fin = seq(1000, 10000, 1000)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

df_nu_reduce = data.frame()
cpt = 1

for (nu in seq_nu_bis) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    
    df_tmp = data.frame.increase(seq_ne_ini, seq_combi)
    col_nu = as.integer(rep(nu, nrow(df_tmp)))
    col_cpt = rep(cpt, nrow(df_tmp))
    col_n0 = rep(NA, nrow(df_tmp))
    col_nf = rep(NA, nrow(df_tmp))
    cpt = cpt + 1
    
    if (nu == seq_nu_bis[1]) {
      df_nu_reduce = data.frame(df_tmp, nu = col_nu, cpt = col_cpt,
                                n0 = col_n0, nf = col_nf)
    }else{
      df_tmp = data.frame(df_tmp, nu = col_nu, cpt = col_cpt,
                          n0 = col_n0, nf = col_nf)
      df_nu_reduce = rbind(df_nu_reduce, df_tmp)
    }
    setwd("../")
  }
}

for (i in 1:nrow(df_nu_reduce)) {
  vec_tmp = as.integer(unlist(str_split(df_nu_reduce$Ne[i], "-")))
  df_nu_reduce$n0[i] = vec_tmp[1]
  df_nu_reduce$nf[i] = vec_tmp[2]
}
ne_gen = data.frame()
cpt = 1

for (nu in seq_nu_bis) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    for (ne_ini in seq_ne_ini) {
      if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
        dir_ne = str_c("Ne", ne_ini, "-XXX/")
        setwd(dir_ne)
        seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
                                        FUN="paste", sep="-"))
        seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
        for (combi in seq_combi) {
          if (dir.exists(str_c("Ne", combi))) {
            dir_ne_bis = str_c("Ne", combi, "/simu_1/")
            setwd(dir_ne_bis)
            cpt = cpt + 1
            file_tmp = read.table("simu_1.par", header = TRUE)
            if (nu == seq_nu_bis[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
              ne_gen = as.data.frame(file_tmp[,2])
            }else{
              ne_gen = rbind(ne_gen, file_tmp[,2])
            }
            
            setwd("../../")
          }
        }
        setwd("../")
      }
    }
    setwd("../")

  }
}
df_nu_reduce_gen_100 = df_nu_reduce[which(df_nu_reduce$Generation == 100),]

scatter3D(x = df_nu_reduce$Generation, y = df_nu_reduce$nf,
          z = df_nu_reduce$mean.het.adm,
          ticktype = "detailed",bty = "b2",pch = 16,
          theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
          xlab = "generation", ylab = "Nf", zlab = "stat")

  # scatter3D(x = df_tmp$Generation, y = vec_tot, z = df_tmp$mean.het.adm,
  #           bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
  #           theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
  #           xlab = "Gen", ylab = "Ne", zlab = "Stat")

Bottleneck

Nu fixé, alpha variable, N0 fixé, Nf fixé

setwd("../new_methis_bottleneck_50000_snp/")

seq_alpha = c(0.1, 0.5, 0.9)
seq_nu_red = c(0.01, 0.25, 0.49)
seq_bottle = seq(10, 90, 10)
seq_ne0 = c(1000)
seq_nef = c(1000)

seq_combi = as.data.frame(outer(seq_ne0, seq_nef, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
cpt = 1

for (alpha in seq_alpha) {
  for (nu in seq_nu_red) {
    for (bottle in seq_bottle) {
      change_dir = str_c("alpha", alpha, "/Nu", nu, "/bottle",bottle,"/")
      setwd(change_dir)
      if (alpha==seq_alpha[1] & nu==seq_nu_red[1] & bottle==seq_bottle[1]) {
        mat_bottle = data.frame.increase(seq_ne0, seq_combi)
        col_alpha = rep(alpha, nrow(mat_bottle))
        col_nu = rep(nu, nrow(mat_bottle))
        col_bottle = rep(bottle, nrow(mat_bottle))
        col_cpt = rep(cpt, nrow(mat_bottle))
        mat_bottle = data.frame(mat_bottle, alpha = col_alpha,
                                U = col_nu, time_botl = col_bottle, cpt = col_cpt)
      }else{
        mat_tmp = data.frame.increase(seq_ne0, seq_combi)
        col_alpha = rep(alpha, nrow(mat_tmp))
        col_nu = rep(nu, nrow(mat_tmp))
        col_bottle = rep(bottle, nrow(mat_tmp))
        col_cpt = rep(cpt, nrow(mat_tmp))
        mat_tmp = data.frame(mat_tmp, alpha = col_alpha,
                             U = col_nu, time_botl = col_bottle, cpt = col_cpt)
        mat_bottle = rbind(mat_bottle, mat_tmp)
      }
      cpt = cpt + 1
      setwd("../../../")
    }
  }
}

mat_bottle$alpha = as.factor(mat_bottle$alpha)
mat_bottle$U = as.factor(mat_bottle$U)
mat_bottle$time_botl = as.factor(mat_bottle$time_botl)
mat_bottle$cpt = as.factor(mat_bottle$cpt)

mat_bottle_time_50 = mat_bottle[which(mat_bottle$time_botl == 50),]
mat_bottle_time_20 = mat_bottle[which(mat_bottle$time_botl == 20),]
mat_bottle_time_80 = mat_bottle[which(mat_bottle$time_botl == 80),]
plot_without_point(mat_bottle, mat_bottle$Generation, 
              mat_bottle$mean.het.adm, mat_bottle$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",
              legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

p = plot_stat_gen(mat_bottle_time_20, mat_bottle_time_20$Generation, 
              mat_bottle_time_20$mean.het.adm, mat_bottle_time_20$alpha,
              mat_bottle_time_20$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",FALSE,
              legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
  geom_vline(xintercept = 21, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p = plot_stat_gen(mat_bottle_time_50, mat_bottle_time_50$Generation, 
              mat_bottle_time_50$mean.het.adm, mat_bottle_time_50$alpha,
              mat_bottle_time_50$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",FALSE,
              legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
  geom_vline(xintercept = 51, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p = plot_stat_gen(mat_bottle_time_80, mat_bottle_time_80$Generation, 
              mat_bottle_time_80$mean.het.adm, mat_bottle_time_80$alpha,
              mat_bottle_time_80$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",FALSE,
              legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
  geom_vline(xintercept = 81, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p = plot_stat_gen(mat_bottle_time_80, mat_bottle_time_80$Generation, 
              mat_bottle_time_80$mean.het.adm, mat_bottle_time_80$cpt,
              mat_bottle_time_80$alpha, size_point = 0,
              "Stat en fonction des générations\nbottleneck 80 N0 1000 Nf 1000",TRUE,
              legd = TRUE, line_t = mat_bottle_time_80$U)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
  geom_vline(xintercept = 81, size = 0.4, color = "orange")+
  labs(color = "alpha", linetype = "U")


print(p)

vec_tmp = c()
vec_tmp[which(mat_bottle$alpha == 0.1)] = 2
vec_tmp[which(mat_bottle$alpha == 0.5)] = 3
vec_tmp[which(mat_bottle$alpha == 0.9)] = 4

scatter3D(x = mat_bottle$Generation, y = as.numeric(as.character(mat_bottle$time_botl)),
          z = mat_bottle$mean.het.adm, col = vec_tmp,
          ticktype = "detailed",bty = "b2",pch = 16,
          theta = 360, phi = 0, type = "l",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
          xlab = "generation", ylab = "bottle", zlab = "stat")